{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from ML.knn import kNN_classify\n",
    "from ML.model_selection import train_test_split\n",
    "from ML.metrics import accuracy_score\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 有两个品种的树苗\n",
    "# 树苗的直径cm    生长天数\n",
    "\n",
    "X = np.array([\n",
    "    [1.0, 100],\n",
    "    [1.1, 200],\n",
    "    [0.9, 150],\n",
    "    [0.2, 190],\n",
    "    [1.0, 100],\n",
    "    [1.1, 200],\n",
    "    [0.7, 150],\n",
    "    [0.2, 190],\n",
    "    [2.1, 250],\n",
    "    [1.8, 220],\n",
    "    [2.2, 290],\n",
    "    [1.9, 270],\n",
    "    [2.1, 390],\n",
    "    [1.8, 220],\n",
    "    [2.2, 258],\n",
    "    [1.9, 360]\n",
    "])\n",
    "y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFVFJREFUeJzt3X+MXeV95/H3F9sldZM1Pzyb9frXkNariJQWyF2WJtEuha1CnFKItu1SuQnJsppkJ1klouoGamkbIllNpW1hoy6WpiEbkEZJEckukBDtUkNUdVOg48hgfjSJQzDYojBNwSmySmHy3T/uM+5lMuN778z9Mffx+yVd3XOe85x7vnPmzMfH55x7TmQmkqR6nTbsAiRJ/WXQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekiq3dtgFAGzcuDHHx8eHXYYkjZT9+/f/TWaOtevXcdBHxBpgBjiamb8cEecAXwLOBvYD78/Mf4iI04HbgbcDPwD+fWY+fbLPHh8fZ2ZmptNSJElARBzupF83h24+DjzZMv77wE2Z+TPAi8C1pf1a4MXSflPpJ0kako6CPiK2AO8FPlfGA7gUuLN0uQ24qgxfWcYp0y8r/SVJQ9DpHv3NwH8BflTGzwZeyszXyvgRYHMZ3gw8C1CmHyv9XyciJiJiJiJmZmdnl1m+JKmdtkEfEb8MvJCZ+3u54MycysxGZjbGxtqeS5AkLVMnJ2PfCfxKROwE3gD8E+C/A2dExNqy174FOFr6HwW2AkciYi2wgeZJWUnSELTdo8/MGzJzS2aOA1cD92fmLuAB4FdLt2uAu8rw3WWcMv3+9OkmkjQ0K/nC1CeB6yLiEM1j8LeW9luBs0v7dcD1KytRklaX6YPTjN88zmk3nsb4zeNMH5wedkkn1dUXpjLzG8A3yvBTwEWL9Pl74Nd6UJskrTrTB6eZuGeC468eB+DwscNM3DMBwK7zdg2ztCV5CwRJ6sLufbtPhPy8468eZ/e+3UOqqD2DXpK68MyxZ7pqXw0MeknqwrYN27pqXw0Meknqwp7L9rB+3frXta1ft549l+0ZUkXtGfSS1IVd5+1i6ooptm/YThBs37CdqSumVu2JWIBYDZe4NxqN9O6VktSdiNifmY12/dyjl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL2lkjdpDuoelq4eDS9JqMYoP6R4W9+gljaRRfEj3sBj0kkbSKD6ke1gMekkjaRQf0j0sBr2kkTSKD+kelrZBHxFviIiHI+KRiHg8Im4s7V+IiO9HxIHyOr+0R0R8NiIORcSjEXFhv38ISaeeUXxI97B0ctXNK8ClmflyRKwD/jwivl6m/XZm3rmg/3uAHeX1r4C95V2SemrXebsM9g603aPPppfL6LryypPMciVwe5nvQeCMiNi08lIlScvR0TH6iFgTEQeAF4D7MvOhMmlPOTxzU0ScXto2A8+2zH6ktC38zImImImImdnZ2RX8CJKkk+ko6DNzLjPPB7YAF0XEzwI3AG8F/iVwFvDJbhacmVOZ2cjMxtjYWJdlS5I61dVVN5n5EvAAcHlmPlcOz7wC/E/gotLtKLC1ZbYtpU2SNASdXHUzFhFnlOGfBH4J+Kv54+4REcBVwGNllruBD5Srby4GjmXmc32pXpLUVidX3WwCbouINTT/YbgjM78aEfdHxBgQwAHgI6X/vcBO4BBwHPhQ78uWJHWqbdBn5qPABYu0X7pE/wQ+uvLSJEm94DdjJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalybYM+It4QEQ9HxCMR8XhE3Fjaz4mIhyLiUET8SUT8RGk/vYwfKtPH+/sjSJJOppM9+leASzPz54Hzgcsj4mLg94GbMvNngBeBa0v/a4EXS/tNpZ8kaUjaBn02vVxG15VXApcCd5b224CryvCVZZwy/bKIiJ5VLEnqSkfH6CNiTUQcAF4A7gO+B7yUma+VLkeAzWV4M/AsQJl+DDh7kc+ciIiZiJiZnZ1d2U8hSVpSR0GfmXOZeT6wBbgIeOtKF5yZU5nZyMzG2NjYSj9OkrSErq66ycyXgAeAXwDOiIi1ZdIW4GgZPgpsBSjTNwA/6Em1kqSudXLVzVhEnFGGfxL4JeBJmoH/q6XbNcBdZfjuMk6Zfn9mZi+LliR1rpM9+k3AAxHxKPCXwH2Z+VXgk8B1EXGI5jH4W0v/W4GzS/t1wPW9L1uSRtP0wWnGbx7ntBtPY/zmcaYPTvd9mWvbdcjMR4ELFml/iubx+oXtfw/8Wk+qk6SKTB+cZuKeCY6/ehyAw8cOM3HPBAC7ztvVt+X6zVhJGpDd+3afCPl5x189zu59u/u6XINekgbkmWPPdNXeKwa9JA3Itg3bumrvFYNekgZkz2V7WL9u/eva1q9bz57L9vR1uQa9JA3IrvN2MXXFFNs3bCcItm/YztQVU309EQsQq+ES90ajkTMzM8MuQ5JGSkTsz8xGu37u0UtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSRtYw7gQ5itrevVKSVqNh3QlyFLlHL2kkDetOkKPIoJc0koZ1J8hRZNBLGknDuhPkKDLoJY2kYd0JchQZ9JJG0rDuBDmKvHulJI0o714pSavQMK799zp6SRqQYV377x69JA3IsK79N+glaUCGde2/QS9JAzKsa//bBn1EbI2IByLiiYh4PCI+Xto/FRFHI+JAee1smeeGiDgUEd+OiHf38weQpEFb7gnVYV3738nJ2NeA38rMb0XEm4D9EXFfmXZTZv631s4RcS5wNfA24J8DfxoR/yIz53pZuCQNw0pOqM5P371vN88ce4ZtG7ax57I9fb/2v+vr6CPiLuCPgHcCLy8S9DcAZObvlfH/A3wqM/9iqc/0OnpJo2L85nEOHzv8Y+3bN2zn6U88PdBa+nIdfUSMAxcAD5Wmj0XEoxHx+Yg4s7RtBp5tme1IaVv4WRMRMRMRM7Ozs92UIUlDM4o3U+s46CPijcCXgU9k5g+BvcBPA+cDzwF/0M2CM3MqMxuZ2RgbG+tmVkkamlG8mVpHQR8R62iG/HRmfgUgM5/PzLnM/BHwx8BFpftRYGvL7FtKmySNvFG8mVonV90EcCvwZGb+YUv7ppZu7wMeK8N3A1dHxOkRcQ6wA3i4dyVL0vCM4s3UOrnq5p3A+4GDEXGgtP0O8BsRcT6QwNPAhwEy8/GIuAN4guYVOx/1ihtJNdl13q5VHewLtQ36zPxzIBaZdO9J5tkDrN7/x0jSKcRvxkpS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9pKGa/Nokaz+9lrgxWPvptUx+bXLYJVWnk9sUS1JfTH5tkr0ze0+Mz+XcifFb3nvLsMqqjnv0koZmav9UV+1aHoNe0tDMLfFMoqXatTwGvaShWRNrumrX8hj0koZm4u0TXbVreTwZK2lo5k+4Tu2fYi7nWBNrmHj7hCdieywyc9g10Gg0cmZmZthlSNJIiYj9mdlo189DN5JUOYNekipn0EtS5Qx6Sapc26CPiK0R8UBEPBERj0fEx0v7WRFxX0R8t7yfWdojIj4bEYci4tGIuLDfP4QkaWmd7NG/BvxWZp4LXAx8NCLOBa4H9mXmDmBfGQd4D7CjvCaAvT/+kdKATE7C2rUQ0Xyf9IZZOvW0DfrMfC4zv1WG/w54EtgMXAncVrrdBlxVhq8Ebs+mB4EzImJTzyuX2pmchL17Ya58nX5urjlu2OsU09Ux+ogYBy4AHgLenJnPlUl/Dby5DG8Gnm2Z7UhpkwZraokbYy3VLlWq46CPiDcCXwY+kZk/bJ2WzW9ddfXNq4iYiIiZiJiZnZ3tZlapM3NL3BhrqXapUh0FfUSsoxny05n5ldL8/PwhmfL+Qmk/CmxtmX1LaXudzJzKzEZmNsbGxpZbv7S0NUvcGGupdqlSnVx1E8CtwJOZ+Yctk+4GrinD1wB3tbR/oFx9czFwrOUQjzQ4E0vcGGupdqlSndzU7J3A+4GDEXGgtP0O8Bngjoi4FjgM/HqZdi+wEzgEHAc+1NOKpU7dUm6MNTXVPFyzZk0z5G/xhlk6tYz+Tc0mJ/1DlnRK6vSmZqN9m+L5y+fmzV8+B4a9JBWjfQsEL5+TpLZGO+i9fE6S2hrtoPfyOUlqa7SD3svnJKmt0T4Z6+VzktTWaAc9NEPdYJekJY32oRtJUlsGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFWubdBHxOcj4oWIeKyl7VMRcTQiDpTXzpZpN0TEoYj4dkS8u1+FS5I608ke/ReAyxdpvykzzy+vewEi4lzgauBtZZ5bImJNr4rVCk1Pw/g4nHZa8316uu7lSgI6eDh4Zv5ZRIx3+HlXAl/KzFeA70fEIeAi4C+WXaF6Y3oaJibg+PHm+OHDzXGAXbvqW66kE1ZyjP5jEfFoObRzZmnbDDzb0udIadOw7d79j2E77/jxZnuNy5V0wnKDfi/w08D5wHPAH3T7ARExEREzETEzOzu7zDLUsWee6a591Jcr6YRlBX1mPp+Zc5n5I+CPaR6eATgKbG3puqW0LfYZU5nZyMzG2NjYcspQN7Zt66591Jcr6YRlBX1EbGoZfR8wf0XO3cDVEXF6RJwD7AAeXlmJ6ok9e2D9+te3rV/fbK9xuZJOaHsyNiK+CFwCbIyII8DvApdExPlAAk8DHwbIzMcj4g7gCeA14KOZOdef0tWV+ROfu3c3D5ts29YM236fEB3WciWdEJk57BpoNBo5MzMz7DIkaaRExP7MbLTr5zdjJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalybYM+Ij4fES9ExGMtbWdFxH0R8d3yfmZpj4j4bEQciohHI+LCfhYvSWqvkz36LwCXL2i7HtiXmTuAfWUc4D3AjvKaAPb2pkydsiYnYe1aiGi+T04OuyJp5LQN+sz8M+BvFzRfCdxWhm8Drmppvz2bHgTOiIhNvSpWp5jJSdi7F+bmmuNzc81xw17qynKP0b85M58rw38NvLkMbwaebel3pLRJ3Zua6q5d0qJWfDI2MxPIbueLiImImImImdnZ2ZWWoRrN78l32i5pUcsN+ufnD8mU9xdK+1Fga0u/LaXtx2TmVGY2MrMxNja2zDJUtTVrumuXtKjlBv3dwDVl+Brgrpb2D5Srby4GjrUc4pG6MzHRXbukRa1t1yEivghcAmyMiCPA7wKfAe6IiGuBw8Cvl+73AjuBQ8Bx4EN9qFmniltuab5PTTUP16xZ0wz5+XZJHYnmIfbhajQaOTMzM+wyJGmkRMT+zGy06+c3YyWpcga9JFXOoJekyhn0klQ5g16SKrcqrrqJiFmal2n2y0bgb/r4+ctlXZ1bjTWBdXVrNda1GmuCzuranpltv3G6KoK+3yJippNLkAbNujq3GmsC6+rWaqxrNdYEva3LQzeSVDmDXpIqd6oE/Wq9r611dW411gTW1a3VWNdqrAl6WNcpcYxekk5lp8oevSSdskY+6CPi8oj4dnkg+fWLTL8uIp4oDyvfFxHbW6bNRcSB8rp7wHV9MCJmW5b/H1umXVMevP7diLhm4bx9rOmmlnq+ExEvtUzry7pa7OHzC6Yv+cD5fq2nDuvaVeo5GBHfjIifb5n2dGk/EBE9vVtfB3VdEhHHWn5X/7Vl2kl//32s6bdb6nmsbEtnlWn9XFdbI+KB8vf/eER8fJE+A9++Oqyrt9tXZo7sC1gDfA94C/ATwCPAuQv6/CKwvgz/J+BPWqa9PMS6Pgj80SLzngU8Vd7PLMNnDqKmBf3/M/D5Aayrfw1cCDy2xPSdwNeBAC4GHurneuqirnfMLw94z3xdZfxpYOOQ1tclwFdX+vvvZU0L+l4B3D+gdbUJuLAMvwn4ziJ/hwPfvjqsq6fb16jv0V8EHMrMpzLzH4Av0XxA+QmZ+UBmHi+jD9J86tXQ6zqJdwP3ZebfZuaLwH3A5UOo6TeAL/ZguSeViz98vtVSD5zv13rqqK7M/GZZLgxuu+pkfS1lJdtkL2sayHYFkJnPZea3yvDfAU/y48+wHvj21Uldvd6+Rj3ou30Y+bU0//We94ZoPrf2wYi4agh1/bvy37M7I2L+EYz9esB6x59bDm+dA9zf0tyvddXOUnWvpgfRL9yuEvi/EbE/IobxOKxfiIhHIuLrEfG20jb09RUR62mG5ZdbmgeyriJiHLgAeGjBpKFuXyepq9WKt6+2T5iqRUT8JtAA/k1L8/bMPBoRbwHuj4iDmfm9AZV0D/DFzHwlIj4M3AZcOqBlt3M1cGdmtj6Fe5jratWKiF+k+Yf4rpbmd5V19U+B+yLir8pe7yB8i+bv6uWI2An8b2DHgJbdzhXA/8vM1r3/vq+riHgjzX9cPpGZP+zlZ69EJ3X1avsa9T36jh5GHhH/FtgN/EpmvjLfnplHy/tTwDdo/ss6kLoy8wcttXwOeHun8/arphZXs+C/131cV+0sVXe/1lPHIuLnaP7urszMH8y3t6yrF4D/RfOwyUBk5g8z8+UyfC+wLiI2sgrWFyffrvqyriJiHc0wnc7MryzSZSjbVwd19Xb76sXJhWG9aP6P5CmahxnmTzC9bUGfC2iehNqxoP1M4PQyvBH4Lr07OdVJXZtaht8HPJj/eBLo+6W+M8vwWYOoqfR7K82TPTGIdVU+c5ylTy6+l9efLHu4n+upi7q20Xw28jsWtP8U8KaW4W8Clw+wrn82/7srAfBMWXcd/f77UVOZvoHmcfyfGtS6Kj/37cDNJ+kz8O2rw7p6un31bOMb1ovmWfPv0Azz3aXt0zT33gH+FHgeOFBed5f2dwAHywZ/ELh2wHX9HvB4Wf4DwFtb5v0P5Zd8CPjQoGoq458CPrNgvr6tK5p7eM8Br9I8Dnot8BHgI2V6AP+j1HwQaPR7PXVY1+eAF1u2q5nS/paynh4pv9/dA67rYy3b1YOtQbHY738QNZU+HwS+tGC+fq+rd9E8nv1oy+9p57C3rw7r6un25TdjJalyo36MXpLUhkEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1Ll/j+92c18JpbZFAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(X[y == 0, 0], X[y == 0, 1], color='r')\n",
    "plt.scatter(X[y == 1, 0], X[y == 1, 1], color='g')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, seed=100)\n",
    "y_predict = kNN_classify(X_train, y_train, X_test)\n",
    "accuracy_score(y_test, y_predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "从上面的结果可知，knn预测出来的准确率非常低。\n",
    "原因是 样本不同特征之间他们的量纲不一样，会严重地影响到机器学习的效果"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 为什么需要对数据进行标准化：\n",
    "样本不同特征之间他们的量纲不一样，会严重地影响到机器学习的效果\n",
    "\n",
    "### 如何对数据进行标准化：\n",
    "将数据按照比例进行缩放，使之落入一个小的特定空间之内\n",
    "\n",
    "### 0均值标准化：\n",
    "使之落入一个均值为0，标准差为1的这样一个范围内\n",
    "\n",
    "#### 标准化并不会让数据的分布发生变化，但是坐标值变小了，去除了量纲对我们算法的影响"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1. , 1.1, 0.9, 0.2, 1. , 1.1, 0.7, 0.2, 2.1, 1.8, 2.2, 1.9, 2.1,\n",
       "       1.8, 2.2, 1.9])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[:, 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "X[:, 0] = (X[:, 0] - np.mean(X[:, 0])) / np.std(X[:, 0])       # 进行0均值标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.57775121, -0.42865412, -0.72684829, -1.77052789, -0.57775121,\n",
       "       -0.42865412, -1.02504246, -1.77052789,  1.06231674,  0.61502548,\n",
       "        1.21141382,  0.76412256,  1.06231674,  0.61502548,  1.21141382,\n",
       "        0.76412256])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[:, 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9.71445146547012e-17"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(X[:, 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9999999999999999"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.std(X[:, 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "X[:, 1] = (X[:, 1] - np.mean(X[:, 1])) / np.std(X[:, 1])       # 进行0均值标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-1.53655713, -0.26798571, -0.90227142, -0.39484285, -1.53655713,\n",
       "       -0.26798571, -0.90227142, -0.39484285,  0.3663    , -0.01427143,\n",
       "        0.87372856,  0.62001428,  2.14229998, -0.01427143,  0.46778571,\n",
       "        1.76172855])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[:, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAExRJREFUeJzt3W+MXFd9xvHn8a4TZEAm4CUxjtebCKttqpR/oxRKX4Q6oCRtYv5KiVYlQUFTWFJUqX0RdaUiIq0KfVECBZuOQkRSrRJo2jSO4jYkDlGKqkDGyGHjuCbGih0bg5cEmSIjiJdfX8y1M97M7M7uvTt3Zs73I43m3jPHc87d9c4z995z73FECACQnlVldwAAUA4CAAASRQAAQKIIAABIFAEAAIkiAAAgUQQAACSKAACARBEAAJCo4bI7sJB169bF2NhY2d0AgL6xe/fun0XESCd1ezoAxsbGVK/Xy+4GAPQN24c6rcshIABIFAEAAIkiAAAgUQQAACSKAACARBEAAJAoAgAACjI9M62x28a06rOrNHbbmKZnpsvu0oJ6+joAAOgX0zPTqj5Q1cmXTkqSDp04pOoDVUnS+KXjZXatLfYAAKAAk7smz3z4n3bypZOa3DVZUo8WRwAAQAEOnzi8pPJeQAAAQAFG144uqbwXEAAAUICpLVNas3rNWWVrVq/R1Japknq0OAIAAAowfum4atfUtGntJlnWprWbVLum1rMngCXJEZHvDeyNku6SdL6kkFSLiC/Oq2NJX5R0taSTkm6MiO8v9t6VSiW4GygAdM727oiodFK3iGGgpyT9dUR83/ZrJe22/XBEPNNU5ypJm7PHH0ranj0DAEqS+xBQRBw7/W0+Iv5P0j5JG+ZV2yrprmh4QtLrbK/P2zYAYPkKPQdge0zS2yR9d95LGyQ937R+RK8MCQBAFxUWALZfI+nfJP1VRPwix/tUbddt12dnZ4vqHgBgnkICwPZqNT78pyPi31tUOSppY9P6hVnZK0RELSIqEVEZGeloWksAwDLkDoBshM/XJO2LiH9sU22HpI+64Z2STkTEsbxtAwCWr4hRQO+W9OeSZmzvycr+VtKoJEXEVyXtVGMI6AE1hoF+rIB2AQA55A6AiPiOJC9SJyR9Km9bAIDicCUwACSKAACARBEAAJAoAgAAEkUAAECiCAAAA6XfJmYvE5PCAxgY/Tgxe5nYAwAwMPpxYvYyEQAABkY/TsxeJgIAwMDox4nZy0QAABgY/Tgxe5kIAAADox8nZi9T7knhVxKTwgPA0ixlUnj2AAAgUQQAACSKAACARBEAAJAoAgAAElVIANi+w/Zx20+3ef1y2yds78kef1dEuwCA5SvqZnBfl/RlSXctUOe/I+LPCmoPAJBTIXsAEfG4pBeLeC8AQHd08xzAu2w/Zfs/bf9+u0q2q7brtuuzs7Nd7B4ApKVbAfB9SZsi4i2S/knSf7SrGBG1iKhERGVkZKRL3QOA9HQlACLiFxHxy2x5p6TVttd1o20AQGtdCQDbF9h2tnxZ1u4L3WgbANBaIaOAbN8t6XJJ62wfkfQZSaslKSK+KunDkj5p+5SkX0m6Lnr5LnQAkIBCAiAirl/k9S+rMUwUANAjuBIYABJFAABAoggAAEgUAQAAiSIAACBRBAAAJIoAAIBEEQAAkCgCAAASRQAAQKIIAABIFAEAAIkiAAAgUQQAACSKAACARBEAAJAoAgAAElVIANi+w/Zx20+3ed22v2T7gO0f2H57Ee0CAJavqD2Ar0u6coHXr5K0OXtUJW0vqF0AwDIVEgAR8bikFxeoslXSXdHwhKTX2V5fRNsAgOXp1jmADZKeb1o/kpUBAErScyeBbVdt123XZ2dny+4OAAysbgXAUUkbm9YvzMpeISJqEVGJiMrIyEhXOgcAKepWAOyQ9NFsNNA7JZ2IiGNdahsA0MJwEW9i+25Jl0taZ/uIpM9IWi1JEfFVSTslXS3pgKSTkj5WRLsAgOUrJAAi4vpFXg9JnyqiLQAYRNMz05rcNanDJw5rdO2oprZMafzS8RVts5AAAAAs3/TMtKoPVHXypZOSpEMnDqn6QFWSVjQEem4UEACkZnLX5JkP/9NOvnRSk7smV7RdAgAASnb4xOEllReFAACAko2uHV1SeVEIAAAo2dSWKa1ZveassjWr12hqy9SKtksAAEDJxi8dV+2amjat3STL2rR2k2rX1FZ8FJAbIzR7U6VSiXq9XnY3AKBv2N4dEZVO6rIHAACJIgAAIFEEAAAkigAAgEQRAACQKAIAwECZnpnW2G1jWvXZVRq7bUzTM9Nld6lncTM4AAOjrJuq9Sv2AAAMjLJuqtavCAAAA6Osm6r1KwIAwMAo66Zq/YoAADAwyrqpWr8qJABsX2l7v+0Dtm9p8fqNtmdt78keHy+iXQBoVtZN1fpV7pvB2R6S9ENJ75V0RNKTkq6PiGea6twoqRIRNy/lvbkZHAAsTbdvBneZpAMRcTAifiPpHklbC3hfAEhGGdcvFBEAGyQ937R+JCub70O2f2D7XtsbC2gXAAbC6esXDp04pFCcuX5hpUOgWyeBH5A0FhF/IOlhSXe2q2i7artuuz47O9ul7gFAefp5Uvijkpq/0V+YlZ0RES9ExK+z1dslvaPdm0VELSIqEVEZGRkpoHsA0Nv6eVL4JyVttn2R7XMkXSdpR3MF2+ubVq+VtK+AdgFgIPTtpPARcUrSzZIeUuOD/ZsRsdf2rbavzap92vZe209J+rSkG/O2CwCDoqzrF5gTGAAKMj0zrcldkzp84rBG145qastUx9cg5Pm3zZYyDJQAAIACzL8TqdT4Ft/tC9GYFB4Auqwf70RKAABAAfrxTqQEAAAUoB/vREoAAEAB+vFOpAQAABSgH+9EyiggABggjAICACyKAACARBEAAJAoAgAAEkUAAECiCAAASBQBAACJIgAAIFEEAAAkigAAgEQRAACQqEICwPaVtvfbPmD7lhavn2v7G9nr37U9VkS7AIDlyx0AtockfUXSVZIukXS97UvmVbtJ0s8j4s2SviDp83nbBTC4Jh6c0PCtw/JnreFbhzXx4ETZXRpIRewBXCbpQEQcjIjfSLpH0tZ5dbZKujNbvlfSFtsuoG0AA2biwQltr2/XXMxJkuZiTtvr2wmBFVBEAGyQ9HzT+pGsrGWdiDgl6YSkN7R6M9tV23Xb9dnZ2QK6B6Cf1HbXllSO5eu5k8ARUYuISkRURkZGyu4OgC47/c2/03IsXxEBcFTSxqb1C7OylnVsD0taK+mFAtoGMGCGPLSkcixfEQHwpKTNti+yfY6k6yTtmFdnh6QbsuUPS3o0enkqMgClqb6juqRyLN9w3jeIiFO2b5b0kKQhSXdExF7bt0qqR8QOSV+T9C+2D0h6UY2QAIBX2Pan2yQ1jvnPxZyGPKTqO6pnylEc5gQGgAHCnMAAgEURAACQKAIAABJFAABAoggAAEgUAQAAiSIAkJ6JCWl4WLIbzxPcZAxpyn0hGNBXJiak7dtfXp+be3l9GxcaIS3sASAttTZ3lGxXDgwwAgBpmWtzR8l25cAAIwCQlqE2d5RsVw4MMAIAaam2uaNku3JggA1mADDKA+1s2yZ98pMvf+MfGmqscwIYCRq8u4HOH+VxGn/kABKQ9t1AGeUBAB0ZvABglAcAdGTwAoBRHgDQkVwBYPv1th+2/Wz2fF6benO292SP+fMFF4tRHgDQkbx7ALdI2hURmyXtytZb+VVEvDV7XJuzzYUxygMAOpJrFJDt/ZIuj4hjttdLeiwifqdFvV9GxGuW+v7MCQwAS9PNUUDnR8SxbPknks5vU+9Vtuu2n7D9/pxtAgAKsOjdQG0/IumCFi9NNq9ERNhutzuxKSKO2r5Y0qO2ZyLiR23aq0qqStLo6Ohi3QMALNOiARARV7R7zfZPba9vOgR0vM17HM2eD9p+TNLbJLUMgIioSapJjUNAi24BAGBZ8h4C2iHphmz5Bkn3z69g+zzb52bL6yS9W9IzOdsFAOSUNwA+J+m9tp+VdEW2LtsV27dndX5PUt32U5K+LelzEUEAAEDJcs0IFhEvSNrSorwu6ePZ8v9IujRPOwCA4g3elcAAgI4QAACQKAIAABJFAABAoggAAEgUAQAAiSIAACBRBAAAJIoAAIBEEQAAkCgCAAASRQAAQKIIAABIFAGQuulpaWxMWrWq8Tw9PdjtAjgj1+2g0eemp6VqVTp5srF+6FBjXZLGxwevXQBncUTvzrpYqVSiXq+X3Y3BNTbW+PCdb9Mm6bnnBq9dIAG2d0dEpZO6HAJK2eHDSyvv93YBnIUASNno6NLK+71dAGfJFQC2P2J7r+3f2m67y2H7Stv7bR+wfUueNlGgqSlpzZqzy9asaZQPYrsAzpJ3D+BpSR+U9Hi7CraHJH1F0lWSLpF0ve1LcraLIoyPS7Va49i73Xiu1Vb+RGxZ7QI4S95J4fdJku2Fql0m6UBEHMzq3iNpq6Rn8rSNgoyPl/PBW1a7AM7oxjmADZKeb1o/kpUBAEq06B6A7UckXdDipcmIuL/oDtmuSqpK0ignBQFgxSwaABFxRc42jkra2LR+YVbWrr2apJrUuA4gZ9sAgDa6cQjoSUmbbV9k+xxJ10na0YV2AQALyDsM9AO2j0h6l6QHbT+Ulb/J9k5JiohTkm6W9JCkfZK+GRF783UbAJBX3lFA90m6r0X5jyVd3bS+U9LOPG0BAIrFlcAAkCgCAAASRQAAQKIIAABIFAEAAIkiAAAgUQQAACSKAACARBEAAJAoAgAAEkUAAECiCAAASBQBAACJIgAAIFEEAAAkigAAgEQRAACQKAIAABKVd07gj9jea/u3tisL1HvO9oztPbbredoEABQj15zAkp6W9EFJ/9xB3fdExM9ytgcAKEiuPYCI2BcR+4vqDNCxiQlpeFiyG88TE2X3COg73ToHEJK+ZXu37epCFW1Xbddt12dnZ7vUPfSViQlp+3Zpbq6xPjfXWCcEgCVxRCxcwX5E0gUtXpqMiPuzOo9J+puIaHl83/aGiDhq+42SHpb0lxHx+GKdq1QqUa9zygDzDA+//OHfbGhIOnWq+/0Beojt3RHR9pxss0XPAUTEFXk7FBFHs+fjtu+TdJmkRQMAaKnVh/9C5QBaWvFDQLZfbfu1p5clvU+Nk8fA8gwNLa0cQEt5h4F+wPYRSe+S9KDth7LyN9nemVU7X9J3bD8l6XuSHoyI/8rTLhJXbXMaqV05gJZyDQONiPsk3dei/MeSrs6WD0p6S552gLNs29Z4rtUah32Ghhof/qfLAXQk73UAQDm2beMDH8iJW0EAQKIIAABIFAEAAIkiAAAgUQQAACSKAACARC16L6Ay2Z6VdKjsfnRonaRBud31oGzLoGyHxLb0ol7djk0RMdJJxZ4OgH5iu97pDZh63aBsy6Bsh8S29KJB2A4OAQFAoggAAEgUAVCcWtkdKNCgbMugbIfEtvSivt8OzgEAQKLYAwCARBEAy2T7I7b32v6t7bYjAWw/Z3vG9h7bPTm/5RK25Urb+20fsH1LN/vYCduvt/2w7Wez5/Pa1JvLfh97bO/odj8XstjP2Pa5tr+Rvf5d22Pd7+XiOtiOG23PNv0ePl5GPzth+w7bx223nMjKDV/KtvUHtt/e7T4uFwGwfE9L+qA6m9ryPRHx1h4eMrbottgekvQVSVdJukTS9bYv6U73OnaLpF0RsVnSrmy9lV9lv4+3RsS13evewjr8Gd8k6ecR8WZJX5D0+e72cnFL+L/yjabfw+1d7eTSfF3SlQu8fpWkzdmjKml7F/pUCAJgmSJiX0TsL7sfRehwWy6TdCAiDkbEbyTdI2nryvduSbZKujNbvlPS+0vsy3J08jNu3sZ7JW2x7S72sRP98H+lYxHxuKQXF6iyVdJd0fCEpNfZXt+d3uVDAKy8kPQt27tt9/OchRskPd+0fiQr6yXnR8SxbPknakxH2sqrbNdtP2G7l0Kik5/xmToRcUrSCUlv6ErvOtfp/5UPZYdM7rW9sTtdWxH98LfREjOCLcD2I5IuaPHSZETc3+Hb/HFEHLX9RkkP2/7f7BtFVxW0LaVbaDuaVyIibLcb4rYp+51cLOlR2zMR8aOi+4oFPSDp7oj4te2/UGOv5k9K7lNyCIAFRMQVBbzH0ez5uO371Ng97noAFLAtRyU1f0u7MCvrqoW2w/ZPba+PiGPZLvjxNu9x+ndy0PZjkt4mqRcCoJOf8ek6R2wPS1or6YXudK9ji25HRDT3+XZJ/9CFfq2UnvjbWA4OAa0g26+2/drTy5Lep8YJ1370pKTNti+yfY6k6yT11AgaNfpzQ7Z8g6RX7NnYPs/2udnyOknvlvRM13q4sE5+xs3b+GFJj0bvXcyz6HbMO0Z+raR9Xexf0XZI+mg2Guidkk40HYrsbRHBYxkPSR9Q41jfryX9VNJDWfmbJO3Mli+W9FT22KvG4ZbS+76cbcnWr5b0QzW+LffctqhxLHyXpGclPSLp9Vl5RdLt2fIfSZrJficzkm4qu9/ztuEVP2NJt0q6Nlt+laR/lXRA0vckXVx2n5e5HX+f/U08Jenbkn637D4vsC13Szom6aXs7+QmSZ+Q9Insdasx6ulH2f+pStl97vTBlcAAkCgOAQFAoggAAEgUAQAAiSIAACBRBAAAJIoAAIBEEQAAkCgCAAAS9f8KIOwk42MhSgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(X[y == 0, 0], X[y == 0, 1], color='r')\n",
    "plt.scatter(X[y == 1, 0], X[y == 1, 1], color='g')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, seed=100)\n",
    "y_predict = kNN_classify(X_train, y_train, X_test)\n",
    "accuracy_score(y_test, y_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ML.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<ML.preprocessing.StandardScaler at 0x117ea7ac8>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.array([\n",
    "    [1.0, 100],\n",
    "    [1.1, 200],\n",
    "    [0.9, 150],\n",
    "    [0.2, 190],\n",
    "    [1.0, 100],\n",
    "    [1.1, 200],\n",
    "    [0.7, 150],\n",
    "    [0.2, 190],\n",
    "    [2.1, 250],\n",
    "    [1.8, 220],\n",
    "    [2.2, 290],\n",
    "    [1.9, 270],\n",
    "    [2.1, 390],\n",
    "    [1.8, 220],\n",
    "    [2.2, 258],\n",
    "    [1.9, 360]\n",
    "])\n",
    "\n",
    "standardScaler = StandardScaler()\n",
    "standardScaler.fit(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "X2 = standardScaler.transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.57775121, -1.53655713],\n",
       "       [-0.42865412, -0.26798571],\n",
       "       [-0.72684829, -0.90227142],\n",
       "       [-1.77052789, -0.39484285],\n",
       "       [-0.57775121, -1.53655713],\n",
       "       [-0.42865412, -0.26798571],\n",
       "       [-1.02504246, -0.90227142],\n",
       "       [-1.77052789, -0.39484285],\n",
       "       [ 1.06231674,  0.3663    ],\n",
       "       [ 0.61502548, -0.01427143],\n",
       "       [ 1.21141382,  0.87372856],\n",
       "       [ 0.76412256,  0.62001428],\n",
       "       [ 1.06231674,  2.14229998],\n",
       "       [ 0.61502548, -0.01427143],\n",
       "       [ 1.21141382,  0.46778571],\n",
       "       [ 0.76412256,  1.76172855]])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.25, seed=100)\n",
    "y_predict = kNN_classify(X_train, y_train, X_test)\n",
    "accuracy_score(y_test, y_predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据标准化  -->  拆分训练集和测试集  -->  KNN分类器进行预测  -->  计算预测准确率"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
